################################################################################
#
#                       美国民粹主义外交政策的民意基础                       
#
#                            付 舒    2025年10月     
#
################################################################################


#####（三）跨问卷插补  ######

library(tidyverse)
library(stargazer)
library(randomForest)
library(ggthemes)
library(showtextdb)
library(sysfonts)
library(showtext)
showtext_auto()
font_families()
font_add("Times","/System/Library/Fonts/Supplemental/Times\ New\ Roman.ttf")
font_add("Songti SC","/System/Library/Fonts/Songti.ttc")


# OLS prediction
lm_model_poli <- lm(popu_poli ~ cv_vote2020trump + cv_ideology + cv_party + 
                                   cv_religion_catho + cv_religion_chris + 
                                   cv_houseowner + cv_female + cv_age + cv_marital_mar +
                                   cv_race_white + cv_race_latin +
                                   cv_education + cv_income,
                 data = anes2020_popu)
poli_pred_lm <- predict(lm_model_poli, newdata = ccga2022)

lm_model_econ <- lm(popu_econ ~ cv_vote2020trump + cv_ideology + cv_party + 
                                   cv_religion_catho + cv_religion_chris + 
                                   cv_houseowner + cv_female + cv_age + cv_marital_mar +
                                   cv_race_white + cv_race_black + cv_race_latin +
                                   cv_education + cv_income,
                    data = anes2020_popu)
econ_pred_lm <- predict(lm_model_econ, newdata = ccga2022)

lm_model_cult <- lm(popu_cult ~ cv_vote2020trump + cv_ideology + cv_party + 
                                   cv_religion_catho + cv_religion_chris + 
                                   cv_houseowner + cv_female + cv_age + cv_marital_mar +
                                   cv_race_white + cv_race_black + cv_race_latin +
                                   cv_education + cv_income,
                    data = anes2020_popu)
cult_pred_lm <- predict(lm_model_cult, newdata = ccga2022)

stargazer(lm_model_poli, lm_model_econ, lm_model_cult,
          type = "text", 
          omit.stat = c("adj.rsq", "f", "ser"),
          dep.var.labels = c("Political Populism", "Economic Populism", "Cultural Populism"),
          column.separate = c(1, 1, 1),
          dep.var.caption = "Dependent Variable")



# fully saturated model
fs_model_poli <- lm(popu_poli ~  (cv_vote2020trump + cv_ideology + cv_party + 
                      cv_religion_catho + cv_religion_chris + 
                      cv_houseowner + cv_female + cv_age + cv_marital_mar +
                      cv_race_white + cv_race_black + cv_race_latin +
                      cv_education + cv_income) ^ 2,
                    data = anes2020_popu)
poli_pred_fs <- predict(fs_model_poli, newdata = ccga2022)

fs_model_econ <- lm(popu_econ ~ (cv_vote2020trump + cv_ideology + cv_party + 
                      cv_religion_catho + cv_religion_chris + 
                      cv_houseowner + cv_female + cv_age + cv_marital_mar +
                      cv_race_white + cv_race_black + cv_race_latin +
                      cv_education + cv_income) ^ 2,
                    data = anes2020_popu)
econ_pred_fs <- predict(fs_model_econ, newdata = ccga2022)

fs_model_cult <- lm(popu_cult ~ (cv_vote2020trump + cv_ideology + cv_party + 
                      cv_religion_catho + cv_religion_chris +
                      cv_houseowner + cv_female + cv_age + cv_marital_mar +
                      cv_race_white + cv_race_black + cv_race_latin +
                      cv_education + cv_income) ^ 2,
                    data = anes2020_popu)
cult_pred_fs <- predict(fs_model_cult, newdata = ccga2022)




# random forest
rf_model_poli <- randomForest(popu_poli ~ cv_vote2020trump + cv_ideology + cv_party +  
                                cv_religion_catho + cv_religion_chris + 
                                cv_houseowner + cv_female + cv_age + cv_marital_mar +
                                cv_race_white + cv_race_black + cv_race_latin +
                                cv_education + cv_income, 
                              ntree = 100,
                              data = anes2020_popu)
poli_pred_rf <- predict(rf_model_poli, newdata = ccga2022)

rf_model_econ <- randomForest(popu_econ ~ cv_vote2020trump + cv_ideology + cv_party +    
                                cv_religion_catho + cv_religion_chris + 
                                cv_houseowner + cv_female + cv_age + cv_marital_mar +
                                cv_race_white + cv_race_black + cv_race_latin +
                                cv_education + cv_income, 
                              ntree = 100,
                              data = anes2020_popu)
econ_pred_rf <- predict(rf_model_econ, newdata = ccga2022)

rf_model_cult <- randomForest(popu_cult ~ cv_vote2020trump + cv_ideology + cv_party +   
                                cv_religion_catho + cv_religion_chris + 
                                cv_houseowner + cv_female + cv_age + cv_marital_mar +
                                cv_race_white + cv_race_black + cv_race_latin +
                                cv_education + cv_income, 
                              ntree = 100,
                              data = anes2020_popu)
cult_pred_rf <- predict(rf_model_cult, newdata = ccga2022)


normalize01 <- function(x) {
  (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}


ccga2022_popu <- ccga2022 %>%
  mutate(poli_pred_lm = normalize01(poli_pred_lm),
         econ_pred_lm = normalize01(econ_pred_lm),
         cult_pred_lm = normalize01(cult_pred_lm)) %>%
  mutate(poli_pred_fs = normalize01(poli_pred_fs),
         econ_pred_fs = normalize01(econ_pred_fs),
         cult_pred_fs = normalize01(cult_pred_fs)) %>%
  mutate(poli_pred_rf = normalize01(poli_pred_rf),
         econ_pred_rf = normalize01(econ_pred_rf),
         cult_pred_rf = normalize01(cult_pred_rf))



# 图1  2022年CCGA跨问卷插补的民粹指数分布

ccga2022_popu_figure <- ccga2022_popu %>%
  pivot_longer(
    cols = c(poli_pred_lm, econ_pred_lm, cult_pred_lm,
             poli_pred_fs, econ_pred_fs, cult_pred_fs,
             poli_pred_rf, econ_pred_rf, cult_pred_rf),
    names_to = c("dimension", "model"),
    names_pattern = "(.*)_pred_(.*)",
    values_to = "pred"
  ) %>%
  dplyr::select(dimension, model, pred) %>%
  na.omit() %>%
  mutate(dimension = case_when(dimension == "cult" ~ "文化维度",
                               dimension == "econ" ~ "经济维度",
                               dimension == "poli" ~ "政治维度")) %>%
  mutate(dimension = factor(dimension, level = c("政治维度", "经济维度", "文化维度"))) %>%
  mutate(model = case_when(model == "lm" ~ "最小二乘",
                           model == "fs" ~ "完全饱和",
                           model == "rf" ~ "随机森林")) %>%
  mutate(model = factor(model, level = c("最小二乘", "完全饱和", "随机森林")))
  
figure1 <- ccga2022_popu_figure %>%
  ggplot(aes(x = pred, linetype = model)) +
  geom_density(aes(y = after_stat(count) / 9), key_glyph = "path") +
  facet_wrap(~ dimension, ncol = 3) + 
  scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
  scale_x_continuous(labels = c("0", "0.25", "0.5", "0.75", "1")) +
  labs(x = "民粹指数", y = "人数", linetype = "模型    ") +
  theme_few() +
  theme(legend.position = "bottom")

figure1


ggsave("图1.pdf", 
       plot = figure1, 
       width = 9, 
       height = 4, 
       units = "in", 
       device = "pdf")
